{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 5.1. Knowing Python to write faster code"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import random\n",
    "l = [random.normalvariate(0,1) for i in range(100000)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def sum1():\n",
    "    # BAD: not Pythonic and slow\n",
    "    res = 0\n",
    "    for i in range(len(l)):\n",
    "        res = res + l[i]\n",
    "    return res"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "319.346"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sum1()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "6.64 ms ± 69.1 µs per loop (mean ± std. dev. of 7 runs,\n",
      "    100 loops each)\n"
     ]
    }
   ],
   "source": [
    "%timeit sum1()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "def sum2():\n",
    "    # STILL BAD\n",
    "    res = 0\n",
    "    for x in l:\n",
    "        res = res + x\n",
    "    return res"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "319.346"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sum2()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "3.3 ms ± 54.7 µs per loop (mean ± std. dev. of 7 runs,\n",
      "    100 loops each)\n"
     ]
    }
   ],
   "source": [
    "%timeit sum2()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "def sum3():\n",
    "    # GOOD\n",
    "    return sum(l)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "319.346"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sum3()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "391 µs ± 840 ns per loop (mean ± std. dev. of 7 runs,\n",
      "    1000 loops each)\n"
     ]
    }
   ],
   "source": [
    "%timeit sum3()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "strings = ['%.3f' % x for x in l]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['-0.056', '-0.417', '-0.357']"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "strings[:3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "def concat1():\n",
    "    # BAD: not Pythonic\n",
    "    cat = strings[0]\n",
    "    for s in strings[1:]:\n",
    "        cat = cat + ', ' + s\n",
    "    return cat"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'-0.056, -0.417, -0.357, '"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "concat1()[:24]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.31 s ± 12.1 ms per loop (mean ± std. dev. of 7 runs,\n",
      "    1 loop each)\n"
     ]
    }
   ],
   "source": [
    "%timeit concat1()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "def concat2():\n",
    "    # GOOD\n",
    "    return ', '.join(strings)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'-0.056, -0.417, -0.357, '"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "concat2()[:24]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "797 µs ± 13.7 µs per loop (mean ± std. dev. of 7 runs,\n",
      "    1000 loops each)\n"
     ]
    }
   ],
   "source": [
    "%timeit concat2()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "l = [random.randint(0, 100) for _ in range(100000)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "def hist1():\n",
    "    # BAD\n",
    "    count = {}\n",
    "    for x in l:\n",
    "        # We need to initialize every number\n",
    "        # the first time it appears in the list.\n",
    "        if x not in count:\n",
    "            count[x] = 0\n",
    "        count[x] += 1\n",
    "    return count"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{0: 979,\n",
       " 1: 971,\n",
       " 2: 990,\n",
       " ...\n",
       " 99: 995,\n",
       " 100: 1009}"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "hist1()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "8.7 ms ± 27.6 µs per loop (mean ± std. dev. of 7 runs,\n",
      "    100 loops each)\n"
     ]
    }
   ],
   "source": [
    "%timeit hist1()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "from collections import defaultdict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "def hist2():\n",
    "    # BETTER\n",
    "    count = defaultdict(int)\n",
    "    for x in l:\n",
    "        # The key is created and the value\n",
    "        # initialized at 0 when needed.\n",
    "        count[x] += 1\n",
    "    return count"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "defaultdict(int,\n",
       "            {0: 979,\n",
       "             1: 971,\n",
       "             ...\n",
       "             99: 995,\n",
       "             100: 1009})"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "hist2()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "6.82 ms ± 217 µs per loop (mean ± std. dev. of 7 runs,\n",
      "    100 loops each)\n"
     ]
    }
   ],
   "source": [
    "%timeit hist2()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "from collections import Counter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "def hist3():\n",
    "    # GOOD\n",
    "    return Counter(l)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Counter({0: 979,\n",
       "         1: 971,\n",
       "         ...\n",
       "         99: 995,\n",
       "         100: 1009})"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "hist3()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "3.69 ms ± 105 µs per loop (mean ± std. dev. of 7 runs,\n",
      "    100 loops each)\n"
     ]
    }
   ],
   "source": [
    "%timeit hist3()"
   ]
  }
 ],
 "metadata": {},
 "nbformat": 4,
 "nbformat_minor": 2
}
